# general libraries used
import sys
import pprint
import json
import re
import os
import shutil
import codecs
import time

# specific libraries used
import numpy as np
import pandas as pd
import datetime
import tweepy
from tweepy import *
import psycopg2

# including the SMaSSD support files
sys.path.append("Classes")
sys.path.append(".")

#########################################
# MAIN CODE FOLLOWS
#########################################
from TextTweet import TextTweet
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
import keras.preprocessing.text as kr
from keras.models import model_from_json
from keras.preprocessing.text import Tokenizer
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from sklearn.metrics import confusion_matrix
import random

def log(m):
   print('{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ": "+m)
   return

nltk.download('wordnet')
nltk.download('stopwords')

language = 'english'
en_stop = set(stopwords.words(language))
stemmer = SnowballStemmer(language)

# Variable definitions
tweets = []

num_categories = 2
input_file = "partisan_nn_"

input_dictionary = input_file + "_dict.json"
input_structure = input_file + "_structure.json"
input_weights = input_file + "_weights.h5"
out_classes = [0, 1]

with open(input_dictionary, 'r') as f:
    dictionary = json.load(f)

with open(input_structure, 'r') as f:
    loaded_model_json = f.read()

model = model_from_json(loaded_model_json)
model.load_weights(input_weights)

conn = psycopg2.connect(database="shoom")
c = conn.cursor()
opp_mentions={}
c.execute('select tweet_id from congress_tweets where opp_mentions=1')
for r in c.fetchall():
    opp_mentions[str(r[0])]=True

c.execute("select t.id,t.text,t.original_text from congress_tweets t, coded_tweets d where t.id=d.id and nn is null")
for r in c.fetchall():
    text = r[1]
    if r[2] is not None and r[2]!='':
        text+=' '+r[2]
    tweet = TextTweet(r[0], text)
    tweet.tokenize()
    tweet.removeURLs()
    tweet.removeStopwords(en_stop)
    tweet.removeShortwords(3)
    tweet.lemmatizeWords()
    tweet.multiplyMentions(3)
    if str(r[0]) in opp_mentions:
        tweet.text+=' oppositionmention'
    tweets.append(tweet)

num_words = len(dictionary)+1
tokenizer = Tokenizer(num_words=num_words)
i=0
for tweet in tweets:
    words = kr.text_to_word_sequence(tweet.text)
    wordArray = []
    for w in words:
        if w in dictionary:
            wordArray.append(dictionary[w])
    raw_pred = model.predict(tokenizer.sequences_to_matrix([wordArray], mode='binary'))
    proc_pred = out_classes[np.argmax(raw_pred)]
    in_p='f'
    if proc_pred==1:
        in_p='t'
    c.execute("UPDATE coded_tweets SET nn='"+in_p+"' WHERE id="+str(tweet.id))
    i+=1
    if i%1000==0:
        log(str(i))
        conn.commit()

c.execute("UPDATE coded_tweets SET nn=1 WHERE id IN (SELECT tweet_id FROM congress_urls_114 WHERE partisan='t')")
c.execute("UPDATE coded_tweets SET nn=1 WHERE id IN (SELECT tweet_id FROM congress_urls_115 WHERE partisan='t')")
conn.commit()
c.close()
conn.close()
